import os
import re
import csv
import pandas as pd


def data_statistics():
    folder_path = 'dataWash'       # 需要读取的文件夹
    csv_files = [os.path.join(folder_path, f) for f in os.listdir(
        folder_path) if f.endswith('.csv')]                 # 提取文件夹里的所有csv文件
    with open('数据统计.csv', 'a', newline='') as f:
        csv_write = csv.writer(f)
        csv_write.writerow(
            ['省/市', '学院总数', '招生总人数', '英语（一）', '英语（二）', '数学（一）', '数学（二）'])
    for file in csv_files:
        data = pd.read_csv(file, header=0)  # 开始读取csv文件
        province_name = file.split('.')[0].split('\\')[-1]  # 省/市名
        students = pd.DataFrame(data, columns=['招录人数']).sum()['招录人数']

        data_obj1 = len(data)  # 筛选后的学院总数
        data_obj2 = len(data.loc[(data['考试科目'].astype(
            str).str.contains('英语（一）')), :])
        data_obj3 = len(data.loc[(data['考试科目'].astype(
            str).str.contains('英语（二）')), :])
        data_obj4 = len(data.loc[(data['考试科目'].astype(
            str).str.contains('数学（一）')), :])
        data_obj5 = len(data.loc[(data['考试科目'].astype(
            str).str.contains('数学（二）')), :])

        with open('数据统计.csv', 'a', newline='') as f:
            csv_write = csv.writer(f)
            csv_write.writerow(
                [province_name, data_obj1, students, data_obj2, data_obj3, data_obj4, data_obj5])


def data_wash():
    folder_path = 'data'  # 需要读取的文件夹
    out_folder_path = 'dataWash'  # 数据存储的文件夹
    csv_files = [os.path.join(folder_path, f) for f in os.listdir(
        folder_path) if f.endswith('.csv')]  # 提取文件夹里的所有csv文件
    for file in csv_files:
        data = pd.read_csv(file, encoding='GBK', header=1)  # 开始读取csv文件
        data_obj = data.loc[(data['考试科目'].astype(  # 数据筛选和过滤
            str).str.contains(r'数据结构|计算机|程序设计')), :]
        out_file_name = file.split('(')[-1].split(')')[1]  # 文件名
        out_file = os.path.join(out_folder_path, out_file_name)
        data_obj.to_csv(out_file)  # 保存数据


if __name__ == '__main__':
    data_wash()  # 筛选和过滤数据
    data_statistics()  # 数据统计
